<!DOCTYPE html>
<html lang="en">

  <head>

    <meta charset="utf-8">
    <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
    <meta name="description" content="Excalibur, PDF, Parsing, Table, Extraction, OCR">
    <meta name="author" content="Vinayak, Mehta, Nikhil, Sikka">

    <title>Excalibur | PDF Table Extraction for Humans</title>

    <!-- Bootstrap core CSS -->
    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/4.1.3/css/bootstrap.min.css" integrity="sha256-eSi1q2PG6J7g7ib17yAaWMcrr5GrtohYChqibrV7PBE=" crossorigin="anonymous" />

    <!-- Custom fonts for this template -->
    <link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.5.0/css/all.css" integrity="sha384-B4dIYHKNBt8Bc12p+WXckhzcICo0wtJAoU8YZTY5qE0Id1GSseTk6S+L3BlXeVIU" crossorigin="anonymous">

    <link href="https://fonts.googleapis.com/css?family=Montserrat:400,700" rel="stylesheet" type="text/css">
    <link href="https://fonts.googleapis.com/css?family=Lato:400,700,400italic,700italic" rel="stylesheet" type="text/css">

    <!-- Custom styles for this template -->
    <link href="css/index.min.css" rel="stylesheet">

    <!-- Start of Async Drift Code -->
    <script>
      "use strict";

      !function() {
        var t = window.driftt = window.drift = window.driftt || [];
        if (!t.init) {
          if (t.invoked) return void (window.console && console.error && console.error("Drift snippet included twice."));
          t.invoked = !0, t.methods = [ "identify", "config", "track", "reset", "debug", "show", "ping", "page", "hide", "off", "on" ],
          t.factory = function(e) {
            return function() {
              var n = Array.prototype.slice.call(arguments);
              return n.unshift(e), t.push(n), t;
            };
          }, t.methods.forEach(function(e) {
            t[e] = t.factory(e);
          }), t.load = function(t) {
            var e = 3e5, n = Math.ceil(new Date() / e) * e, o = document.createElement("script");
            o.type = "text/javascript", o.async = !0, o.crossorigin = "anonymous", o.src = "https://js.driftt.com/include/" + n + "/" + t + ".js";
            var i = document.getElementsByTagName("script")[0];
            i.parentNode.insertBefore(o, i);
          };
        }
      }();
      drift.SNIPPET_VERSION = '0.3.1';
      drift.load('mkxyurrr8tf3');
      </script>
      <!-- End of Async Drift Code -->

  </head>

  <body id="page-top">

    <!-- Navigation -->
    <nav class="navbar navbar-expand-lg bg-secondary fixed-top text-uppercase" id="mainNav">
      <div class="container">
        <a class="navbar-brand js-scroll-trigger" href="#page-top">Excalibur</a>
        <button class="navbar-toggler navbar-toggler-right text-uppercase bg-primary text-white rounded" type="button" data-toggle="collapse" data-target="#navbarResponsive" aria-controls="navbarResponsive" aria-expanded="false" aria-label="Toggle navigation">
          Menu
          <i class="fas fa-bars"></i>
        </button>
        <div class="collapse navbar-collapse" id="navbarResponsive">
          <ul class="navbar-nav ml-auto">
            <li class="nav-item mx-0 mx-lg-1">
              <a class="nav-link py-3 px-0 px-lg-3 rounded js-scroll-trigger" href="#about">About</a>
            </li>
            <li class="nav-item mx-0 mx-lg-1">
              <a class="nav-link py-3 px-0 px-lg-3 rounded js-scroll-trigger" href="#usage">Usage</a>
            </li>
            <li class="nav-item mx-0 mx-lg-1">
              <a class="nav-link py-3 px-0 px-lg-3 rounded js-scroll-trigger" href="#contact">Contact</a>
            </li>
          </ul>
        </div>
      </div>
    </nav>

    <!-- Header -->
    <header class="masthead bg-primary text-white">
      <div class="container">
        <div class="row">
          <div class="col-md-6 col-sm-12 col-xs-12 text-center">
            <img src="https://excalibur-py.readthedocs.io/en/master/_static/excalibur-logo-circle.png" class="rounded-circle img-fluid mb-5 d-block mx-auto"
              width="150" height="auto" alt="Excalibur">
            <hr class="star-light">
            <h1 class="text-uppercase mb-1 lead">Excalibur</h1>
            <h4 class="mb-4 mt-3">Extract tables from PDFs into CSVs</h4>
            <a class="github-button" href="https://github.com/camelot-dev/excalibur" data-size="large" data-show-count="true" aria-label="Star camelot-dev/excalibur on GitHub">Star</a>
          </div>
          <div class="col-md-5 offset-md-1 offset-sm-0 col-sm-12 col-xs-12 mt-4">
            <div class="text-center">
              <h5 class="mb-1">Available for Windows, Mac and Linux</h5>
              <p class="lead">Excalibur can be easily installed using pip.</p>
              <p class="package-header__pip-instructions">
                <span id="pip-command">pip install excalibur-py</span>
                <button class="-js-copy-pip-command tooltipped tooltipped-s" data-clipboard-target="#pip-command" aria-label="Copy to clipboard" data-original-label="Copy to clipboard"><i class="fa fa-copy" aria-hidden="true"></i></button>
              </p>
            </div>
            <div class="text-center">
              <p class="lead">Or run directly with the executable!</p>
              <a class="btn btn-secondary btn-xl" href="https://github.com/camelot-dev/excalibur/releases" target="_blank">
                <i class="fas fa-download mr-2"></i>
                Download Now!
              </a>
            </div>
          </div>
        </div>
      </div>
    </header>

    <!-- About Section -->
    <section class="text-secondary mb-0" id="about">
      <div class="container-fluid">
        <h2 class="text-center text-uppercase text-secondary">About</h2>
        <hr class="star-dark mb-5">
      </div>
      <div class="container">
        <div class="row">
          <div class="col-lg-6">
            <div class="media mt-4">
              <h3><i class="fas fa-file-pdf text-accent mr-3"></i></h3>
              <div class="media-body">
                <h5 class="mb-1 text-accent">The Portable Document Format</h5>
                <p class="lead text-helper">A PDF file defines instructions to place characters at precise <strong class="font-weight-bold">x,y</strong> coordinates relative to the bottom-left corner of the page. Words are simulated by placing some characters closer than others. Spaces are simulated by placing words relatively far apart. And finally tables are simulated by placing words as they would appear in a spreadsheet. The format has no internal representation of a table structure.</p>
              </div>
            </div>
          </div>
          <div class="col-lg-6">
            <div class="media mt-4">
              <h3><i class="fa fa-table text-accent mr-3"></i></h3>
              <div class="media-body">
                <h5 class="mb-1 text-accent">Extracting tables from PDFs is hard</h5>
                <p class="lead text-helper">The Portable Document Format was not designed for tabular data. Sadly, a lot of open data is shared as PDFs and getting tables out for analysis is a pain. A simple copy-and-paste doesn't work. <strong class="font-weight-bold">Excalibur makes PDF table extraction very easy</strong>, by automatically detecting tables in PDFs and letting you save them into CSVs and Excel files through a web interface.</p>
              </div>
            </div>
          </div>
          <div class="col-lg-6">
            <div class="media mt-4">
              <h3><i class="fa fa-wrench text-accent mr-3"></i></h3>
              <div class="media-body">
                <h5 class="mb-1 text-accent">Why another tool?</h5>
                <p class="lead text-helper">There are both open and closed-source tools that are widely used for PDF table extraction. They either give a nice output or fail miserably. Excalibur is powered by <a href="https://camelot-py.readthedocs.io">Camelot</a> which gives users additional settings to tweak table extraction and get the best results. You can see how it performs better than other open-source tools and libraries <a href="https://github.com/socialcopsdev/camelot/wiki/Comparison-with-other-PDF-Table-Extraction-libraries-and-tools" target="_blank">in this comparison</a>.</p>
              </div>
            </div>
          </div>
          <div class="col-lg-6">
            <div class="media mt-4">
              <h3><i class="fa fa-rocket text-accent mr-3"></i></h3>
              <div class="media-body">
                <h5 class="mb-1 text-accent">Secure and built for scale</h5>
                <p class="lead text-helper">You get complete control over your data, since all file storage and processing happens on your own local or remote machine. Excalibur can also be configured with MySQL and <a href="http://www.celeryproject.org/" target="_blank">Celery</a> to execute table extraction jobs in a parallel and distributed manner. By default, jobs are executed sequentially.</p>
              </div>
            </div>
          </div>
        </div>
      </div>
    </section>

    <!-- Usage Section -->
    <section class="bg-primary features" id="usage">
      <div class="container">
        <h2 class="text-center text-uppercase text-white mb-0">Usage</h2>
        <hr class="star-light mb-5">
        <div class="row align-items-center mb-5 text-white">
          <div class="col-md-6 order-lg-first">
            <a href="#">
              <img class="img-fluid rounded mb-3 mb-md-0" src="https://excalibur-py.readthedocs.io/en/master/_static/gifs/upload.gif" alt="">
            </a>
          </div>
          <div class="col-md-6 order-lg-last">
            <h3>Upload a PDF</h3>
            <p>You can upload a PDF using the web interface. You can also interact with previous uploads.
            </p>
          </div>
        </div>
        <div class="row align-items-center mb-5 text-white">
          <div class="col-md-6 order-lg-last">
            <a href="#">
              <img class="img-fluid rounded mb-3 mb-md-0" src="https://excalibur-py.readthedocs.io/en/master/_static/gifs/auto-detect.gif" alt="">
            </a>
          </div>
          <div class="col-md-6 order-lg-first">
            <h3>Autodetect tables</h3>
            <p>Excalibur can automatically detect tables in your PDF.</p>
          </div>
        </div>
        <div class="row align-items-center mb-5 text-white">
          <div class="col-md-6 order-lg-first">
            <a href="#">
              <img class="img-fluid rounded mb-3 mb-md-0" src="https://excalibur-py.readthedocs.io/en/master/_static/gifs/table-and-column.gif" alt="">
            </a>
          </div>
          <div class="col-md-6 order-lg-first">
            <h3>Or draw table areas and/or column separators</h3>
            <p>You can guide the tool by drawing table areas and column separators in cases where the tables are buried deep inside the text and autodetection fails.</p>
          </div>
        </div>
        <div class="row align-items-center mb-5 text-white">
          <div class="col-md-6 order-lg-last">
            <a href="#">
              <img class="img-fluid rounded mb-3 mb-md-0" src="https://excalibur-py.readthedocs.io/en/master/_static/gifs/saved-rule.gif" alt="">
            </a>
          </div>
          <div class="col-md-6 order-lg-first">
            <h3>Or load saved settings</h3>
            <p>You can save table extraction settings for a PDF once, and apply them on new PDFs to extract tables with similar structures.</p>
          </div>
        </div>
        <div class="row align-items-center mb-5 text-white">
          <div class="col-md-6 order-lg-first">
            <a href="#">
              <img class="img-fluid rounded mb-3 mb-md-0" src="https://excalibur-py.readthedocs.io/en/master/_static/gifs/download.gif" alt="">
            </a>
          </div>
          <div class="col-md-6 order-lg-first">
            <h3>View and download data</h3>
            <p>Finally, you can view the extracted tables and download them as CSVs or Excel files. Excalibur also supports JSON and HTML.</p>
          </div>
        </div>
      </div>
    </section>

    <!-- Contact Section -->
    <section id="contact">
      <div class="container">
        <h2 class="text-center text-uppercase text-secondary">Contact</h2>
        <hr class="star-dark mb-5">
      </div>
      <div class="container">
        <div class="row">
          <div class="col-md-3"></div>
          <div class="col-md-6">
            <h4 class="lead">Do you have feedback or want us to build a new feature? Just holler!</h4>
            <hr>
            <form action="https://formspree.io/vmehta94@gmail.com" method="POST">
            <label for="emailID">E-mail</label>
            <input class="form-control form-control-lg mb-3" type="email" name="emailID" id="email" placeholder="example@domain.com">

            <label for="comment">Message</label>
            <textarea class="form-control form-control-lg mb-3" name="comment" id="comment" cols="30" rows="5" placeholder="Give us feedback, tell us about your awesome use case or just say hello!"></textarea>

            <div class="text-right">
              <button type="submit" value="Send" class="btn btn-lg btn-block btn-accent">Submit</button>
            </div>
            </form>
          </div>
        </div>
      </div>
    </section>

    <!-- Footer -->
    <footer class="footer text-center">
      <div class="container">
        <div class="row">
          <div class="col-md-12">
            <ul class="list-inline mb-0">
              <li class="list-inline-item">
                <a class="btn btn-outline-light btn-social text-center rounded-circle" href="https://github.com/camelot-dev/excalibur" target="_blank">
                  <i class="fab fa-fw fa-github"></i>
                </a>
              </li>
            </ul>
            <a class="mt-2 d-inline-block text-white" href="https://github.com/camelot-dev/excalibur" target="_blank">View on Github</a>
          </div>
        </div>
      </div>
    </footer>

    <div class="copyright py-4 text-center text-white">
      <div class="container">
        <small><p class="mb-2">Copyright <a href="https://github.com/camelot-dev/" target="_blank">&copy; Camelot Dev</a> 2018</p></small>
        <small><p>Made with <i class="fa fa-heart mx-1"></i> in New Delhi, India</p></small>
      </div>
    </div>

    <!-- Scroll to Top Button (Only visible on small and extra-small screen sizes) -->
    <div class="scroll-to-top d-lg-none position-fixed ">
      <a class="js-scroll-trigger d-block text-center text-white rounded" href="#page-top">
        <i class="fa fa-chevron-up"></i>
      </a>
    </div>

    <!-- Bootstrap core JavaScript -->
    <script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
    <script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/4.1.3/js/bootstrap.bundle.min.js"></script>

    <!-- Plugin JavaScript -->
    <script src="https://cdnjs.cloudflare.com/ajax/libs/jquery-easing/1.4.1/jquery.easing.min.js"></script>

    <!-- Clipboard -->
    <script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.0/clipboard.min.js"></script>

    <!-- Place this tag in your head or just before your close body tag. -->
    <script async defer src="https://buttons.github.io/buttons.js"></script>

    <!-- Custom scripts for this template -->
    <script src="js/main.min.js"></script>

  </body>

</html>
